
import os
import time
import requests
from bs4 import BeautifulSoup
import lxml

# 获取网页内容
headers = {
    'User-Agent': "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.104 Safari/537.36",
    "Referer": "http://www.mm131.com/mingxing/"
}


# 获取图片链接
def get_img_url(page_url):
    img_html = requests.get(page_url, headers=headers)
    img_url = BeautifulSoup(img_html.text, 'lxml').find('div', class_='content-pic').find('img')['src']
    return img_url


# 下载图片
def download_img(img_url, dir_name):
    name = img_url.split('/')[-1]
    name = os.path.join(dir_name, name)
    img = requests.get(img_url, headers=headers)
    with open(name, 'ab') as f:
        f.write(img.content)


# 创建文件夹
def mkdir(f_dir_name, path):
    try:
        path = path.strip()
        child_dir_name = os.path.join(f_dir_name, path)
        existed = os.path.exists(child_dir_name)
        if not existed:
            print("新建文件夹:%s" % (path))
            os.makedirs(child_dir_name)
        else:
            print("文件夹%s已存在!" % (path))
        return child_dir_name
    except:
        return None


def get_all():
    start_url = 'http://www.mm131.com/mingxing'
    start_html = requests.get(start_url, headers=headers)
    start_html.encoding ='gb18030'
    a_img = BeautifulSoup(start_html.text, 'lxml').find('dl', class_='public-box').find_all('a',class_="",attrs={"target": "_blank"})
    # print(a_img)
    for i in a_img:
        a_text = i.get_text()  # a标签的文本
        a_href = i["href"]  # 取出a标签的href 属性
        headers["Referer"] = a_href
        # print(i, headers)
        html = requests.get(a_href, headers=headers)
        html.encoding = 'gb18030'
        max_page = int(BeautifulSoup(html.text, 'lxml').find('span', class_='page-ch').get_text()[1:-1])
        # 套图保存在同一个文件夹中
        child_dir = a_text
        to_save_path = mkdir("./mx/", child_dir)
        print("当前套图共%s页" % (max_page))
        if to_save_path:
            for page in range(1, int(max_page) + 1):
                if page == 1:
                    page_url=a_href
                else:
                    page_url=a_href.replace(".html","_"+str(page)+".html")
                img_url = get_img_url(page_url)
                download_img(img_url, to_save_path)


if __name__ == "__main__":
    get_all()

"""
本次作业在参考了技术文档后最终还是参考了游于艺同学的作业。
对比Eric老师的示例文档后，想加一个将文件保存在特定文件夹里方式
仿照老师的文档添加了几处之后，出现了之前没出问题的句段多处参数报错。
调整个把小时无果，只能先回到这个能交作业的版本，明天再研究研究。
"""
